#####################
# Sleep Project - Pedro Bessone, Gautam Rao, Heather Schofield, Frank Schilbach, and Mattie Toma
# Purpose: Provides adjusted p-values for table 9 from the Appendix (Main Treatment Effects, Pooling Night-Sleep Treatments and Including Multiple Hypothesis Testing Corrections). 
#          Takes both the unadjusted p-values from table_A9.do and the RI p-values from table_4_RI.do, performs the
#          step-down procedure, and exports the adjusted p-values.
# Last edited: 07 May 2021
#####################

rm(list = ls())

options(scipen = 99)

require(rio)
require(tidyverse)

# Load function that performs step-down adjustment 
source("Scripts/adjust_pval_step_down_fun.R")

# Load actual p-values
df.p = import("Datasets/pvals_unadjusted_table_break_work.csv") %>% 
  as_tibble() %>%
  rename_with(~gsub("1","",.x, fixed = TRUE))
  
# Divide them into the families that we want to correct p-values
df.family = df.p %>% 
  filter( var_name %in%  c("earnings", "wellbeing", "cogindex", "pref") ) %>% 
  mutate(p_val_nap = ifelse(is.na(p_val_nap), p_val_nap_pool, p_val_nap) ) %>% 
  select(- p_val_nap_pool) %>% 
  mutate(p_val_nap_break = ifelse(is.na(p_val_nap_break), p_val_nap, p_val_nap_break),
         p_val_nap_work  = ifelse(is.na(p_val_nap_work), p_val_nap, p_val_nap_work),
         p_val_break_work_diff  = ifelse(is.na(p_val_break_work_diff), p_val_nap, p_val_break_work_diff))

df.work = df.p %>% 
  filter( var_name %in%  c("prod", "typing", "output")) %>% 
  select(-p_val_nap)

# Load RI p-vals/t-stats
df.ri.pvals.raw = import("Datasets/mht_ri_pvals_table_pool.dta") %>% as_tibble()

# Transform t-stats into p-vals
t_to_pval = function(t){
  pval = 2*pnorm(-abs(t))
}

# Keep only pvals and t-stats
df.ri.pvals.all = df.ri.pvals.raw %>% 
  select(contains(c("tstat_", "p_"))) %>% 
  mutate(across(.cols = contains("tstat_"), .fns = t_to_pval))

# Rename t_stats to p_vals 
colnames(df.ri.pvals.all)[str_detect(colnames(df.ri.pvals.all), "tstat_")] = 
  str_replace(colnames(df.ri.pvals.all)[str_detect(colnames(df.ri.pvals.all), "tstat_")], pattern = "tstat_", replacement = "p_")

df.work.RI = df.ri.pvals.all %>% 
  select(contains(c("break", "work"))) %>% 
  select(!contains("earnings")) 

df.family.RI = df.ri.pvals.all %>% 
  select(contains(c("earnings", "wellbeing", "cogindex", "pref"))) 

# Function to get p-vals for different comparisons
get_adjust_pvals_family = function(df.original, df.RI){
  ### Correct family-wise outcomes ####
  # Night-sleep vs. Control
  p_val_original = df.original$p_val_ns
  p_val_RI_mat   = df.RI %>% select(!contains(c("nap", "_d_"))) %>% as.matrix
  
  p_val_mht_ns_cont = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Nap vs. Pooled Break and Work
  p_val_original = df.original$p_val_nap
  p_val_RI_mat   = df.RI %>% select(!contains(c("ns", "break", "work"))) %>% as.matrix
  
  p_val_mht_nap_pool = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Nap vs. Break
  p_val_original = df.original$p_val_nap_break
  p_val_RI_mat   = df.RI %>% 
    select(contains(c("break", "nap"))) %>% 
    select(!contains(c("_d_", "work", "pool"))) %>% 
    as.matrix
  
  p_val_mht_nap_break = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)

  # Nap vs. Work
  p_val_original = df.original$p_val_nap_work
  p_val_RI_mat   = df.RI %>% 
    select(contains(c("work", "nap"))) %>% 
    select(!contains(c("_d_", "break", "pool"))) %>% 
    as.matrix
  
  p_val_mht_nap_work = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
    
  df.res = tibble(
    comparison = rep(c("NS vs. Cont","Nap vs. Pool", "Nap vs. Break", "Nap vs. Work"),rep(nrow(df.original),4)),
    outcome = rep(df.original$var_name, 4),
    p_val_original = c(df.original$p_val_ns, df.original$p_val_nap, df.original$p_val_nap_break, df.original$p_val_nap_work),
    p_val_adj = c(p_val_mht_ns_cont, p_val_mht_nap_pool, p_val_mht_nap_break, p_val_mht_nap_work)
    )
  
  return(df.res)
  
}

get_adjust_pvals_work = function(df.original, df.RI){
  ### Correct family-wise outcomes ####
  
  # Nap vs. Break
  p_val_original = df.original$p_val_nap_break
  p_val_RI_mat   = df.RI %>% 
    select(contains(c("break"))) %>% 
    select(!contains(c("_d_"))) %>% 
    as.matrix
  
  p_val_mht_nap_break = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Nap vs. Break
  p_val_original = df.original$p_val_nap_work
  p_val_RI_mat   = df.RI %>% 
    select(contains(c("work"))) %>% 
    select(!contains(c("_d_"))) %>% 
    as.matrix
  
  p_val_mht_nap_work = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  df.res = tibble(
    comparison = rep(c("Nap vs. Break", "Nap vs. Work"),rep(nrow(df.original),2)),
    outcome = rep(df.original$var_name, 2),
    p_val_original = c(df.original$p_val_nap_break, df.original$p_val_nap_work),
    p_val_adj = c(p_val_mht_nap_break, p_val_mht_nap_work)
  )
  
  return(df.res)
  
}

# Correction across family-wide outcomes
df.pval.adj.family = get_adjust_pvals_family(df.family, df.family.RI)
# Correction across work outcomes
df.pval.adj.work   = get_adjust_pvals_work(df.work, df.work.RI)

df.pval.all = bind_rows(
  df.pval.adj.family,
  df.pval.adj.work
  )

export(df.pval.all, "Datasets/pval_adj_table_work_break.csv")





